{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a348c3a3",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os, json, requests, ast, datetime\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "\n",
    "import geopandas as gpd\n",
    "import geopy.distance as geodist\n",
    "\n",
    "os.chdir(\"/Users/xiaosongw/Dropbox/Research/InformedSources/Replication/Build\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "37d1630c",
   "metadata": {},
   "outputs": [],
   "source": [
    "gdf0_ucl = gpd.read_file(\"./Input/1270055004_ucl_2016_aust_shape/UCL_2016_AUST.shp\")\n",
    "gdf0_ucl.columns = gdf0_ucl.columns.str.lower()\n",
    "gdf0_ucl['ucl_code16'] = gdf0_ucl['ucl_code16'].astype(int)\n",
    "gdf0_ucl = gdf0_ucl[gdf0_ucl['areasqkm16']>1].copy()\n",
    "gdf0_ucl.reset_index(drop=True, inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c9f4bca9",
   "metadata": {},
   "outputs": [],
   "source": [
    "d_bnd = gdf0_ucl.loc[\n",
    "    gdf0_ucl['ucl_name16']=='Melbourne', 'geometry'].bounds.reset_index(drop=True).transpose()[0].to_dict()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e9b61796-a226-4177-a3be-908c130da2d3",
   "metadata": {},
   "source": [
    "# Scrape PetrolSpy"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0633077d",
   "metadata": {},
   "outputs": [],
   "source": [
    "grid_lng = np.arange(d_bnd['minx']-0.1, d_bnd['maxx']+0.1, 0.09)\n",
    "grid_lat = np.arange(d_bnd['miny']-0.1, d_bnd['maxy']+0.1, 0.09)\n",
    "\n",
    "out = []\n",
    "for i in range(len(grid_lat)-1):\n",
    "    for j in range(len(grid_lng)-1):\n",
    "        nelat = grid_lat[i+1]\n",
    "        swlat = grid_lat[i]\n",
    "        nelng = grid_lng[j+1]\n",
    "        swlng = grid_lng[j]\n",
    "        url = f\"https://petrolspy.com.au/webservice-1/station/box?neLat={nelat}&neLng={nelng}&swLat={swlat}&swLng={swlng}\"\n",
    "        content = json.loads(requests.get(url=url).content)\n",
    "        out.append(content)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3519ff38",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_out = pd.DataFrame()\n",
    "for item in out:\n",
    "    if len(item['message']['list'])>0:\n",
    "        df_out = pd.concat([df_out, pd.DataFrame(item['message']['list'])], axis=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "42829ee8",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_out.to_csv(\"./Output/petrolspy_stations_raw.csv\", index=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b82b313f-04b7-4917-b2b0-e2cf51a41819",
   "metadata": {},
   "source": [
    "# Select Active Melbourne Station"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "11ce7d59-675a-4c43-b2f7-efbff8b12f5e",
   "metadata": {},
   "outputs": [],
   "source": [
    "gdf0_ucl = gpd.read_file(\"./Input/1270055004_ucl_2016_aust_shape/UCL_2016_AUST.shp\")\n",
    "gdf0_ucl.columns = gdf0_ucl.columns.str.lower()\n",
    "gdf0_ucl['ucl_code16'] = gdf0_ucl['ucl_code16'].astype(int)\n",
    "gdf0_ucl = gdf0_ucl[gdf0_ucl['areasqkm16']>1].copy()\n",
    "gdf0_ucl.reset_index(drop=True, inplace=True)\n",
    "\n",
    "gdf_mel = gdf0_ucl[gdf0_ucl['ucl_name16']=='Melbourne'].reset_index(drop=True)\n",
    "display(gdf_mel)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5d7ee689-6c4d-4af7-bce9-39c6a2049498",
   "metadata": {},
   "outputs": [],
   "source": [
    "df0 = pd.read_csv(\"./Output/petrolspy_stations_raw.csv\")\n",
    "df = df0.drop(['id', 'autoUpdated', 'mappingOption', 'verified', 'icon', 'brandIcon',\n",
    "              'supportedPetrol', 'ausFuelCard', 'shellCard', 'eftops', 'truckpark', 'state', 'country'], axis=1)\n",
    "df['lng'] = df['location'].apply(lambda x: ast.literal_eval(x)['x'])\n",
    "df['lat'] = df['location'].apply(lambda x: ast.literal_eval(x)['y'])\n",
    "df.drop('location', axis=1, inplace=True)\n",
    "print(df.columns)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3efaf2ce-0714-4ed3-880a-2d84bb87412e",
   "metadata": {},
   "outputs": [],
   "source": [
    "df['u91'] = df['prices'].apply(lambda x: json.dumps(ast.literal_eval(x)['U91'])\n",
    "                       if 'U91' in ast.literal_eval(x).keys() else '')\n",
    "df['p'] = df['u91'].apply(lambda x: json.loads(x)['amount'] if len(x)>0 else np.nan)\n",
    "df['t'] = df['u91'].apply(lambda x: json.loads(x)['updated'] if len(x)>0 else np.nan)\n",
    "df['t'] = df['t'].apply(lambda x: datetime.datetime.fromtimestamp(x / 1e3) if x>0 else np.nan)\n",
    "\n",
    "t0 = datetime.datetime.strptime('2023-02-17', '%Y-%m-%d')\n",
    "df['is_active'] = (df['t']>=(t0-datetime.timedelta(days=30))).astype(int)\n",
    "df['coors'] = gpd.points_from_xy(df.lng, df.lat)\n",
    "df['is_metro'] = df['coors'].apply(lambda x: x.within(gdf_mel.loc[0, 'geometry'])).astype(int)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "21562186-e278-4ce4-8cf0-3d6528a692da",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_mel = df[(df['is_metro']==1)&(df['is_active']==1)].copy()\n",
    "df_mel['id'] = df_mel.groupby('address').ngroup()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d8ee0212-2d48-4a8d-97f9-eff0f4fa0701",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_mel.to_csv(\"./Output/petrolspy_stations_mel.csv\", index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8cba1451-317e-4601-99eb-32f394984f4f",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.16"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
